

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Unsupervised Crosslingual Embeddings &mdash; NLP Architect by Intel® AI Lab 0.5.2 documentation</title>
  

  
  
  
  

  
  <script type="text/javascript" src="../_static/js/modernizr.min.js"></script>
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script type="text/javascript" src="../_static/jquery.js"></script>
        <script type="text/javascript" src="../_static/underscore.js"></script>
        <script type="text/javascript" src="../_static/doctools.js"></script>
        <script type="text/javascript" src="../_static/language_data.js"></script>
        <script type="text/javascript" src="../_static/install.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="../_static/js/theme.js"></script>

    

  
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/nlp_arch_theme.css" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Open+Sans:100,900" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../index.html">
          

          
            
            <img src="../_static/logo.png" class="logo" alt="Logo"/>
          
          </a>

          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <ul>
<li class="toctree-l1"><a class="reference internal" href="../quick_start.html">Quick start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials.html">Jupyter Tutorials</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_zoo.html">Model Zoo</a></li>
</ul>
<p class="caption"><span class="caption-text">NLP/NLU Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../tagging/sequence_tagging.html">Sequence Tagging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../sentiment.html">Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bist_parser.html">Dependency Parsing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../intent.html">Intent Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../lm.html">Language Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="../information_extraction.html">Information Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../transformers.html">Transformers</a></li>
<li class="toctree-l1"><a class="reference internal" href="additional.html">Additional Models</a></li>
</ul>
<p class="caption"><span class="caption-text">Optimized Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../quantized_bert.html">Quantized BERT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../transformers_distillation.html">Transformers Distillation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../sparse_gnmt.html">Sparse Neural Machine Translation</a></li>
</ul>
<p class="caption"><span class="caption-text">Solutions</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../absa_solution.html">Aspect Based Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="../term_set_expansion.html">Set Expansion</a></li>
<li class="toctree-l1"><a class="reference internal" href="../trend_analysis.html">Trend Analysis</a></li>
</ul>
<p class="caption"><span class="caption-text">For Developers</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../generated_api/nlp_architect_api_index.html">nlp_architect API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../developer_guide.html">Developer Guide</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">NLP Architect by Intel® AI Lab</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../index.html">Docs</a> &raquo;</li>
        
      <li>Unsupervised Crosslingual Embeddings</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="unsupervised-crosslingual-embeddings">
<h1>Unsupervised Crosslingual Embeddings<a class="headerlink" href="#unsupervised-crosslingual-embeddings" title="Permalink to this headline">¶</a></h1>
<div class="section" id="overview">
<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
<p>This model uses a GAN to learn mapping between two language embeddings without supervision as demonstrated in Word Translation Without Parallel Data <a class="footnote-reference" href="#id2" id="id1">[1]</a>.</p>
<img alt="../_images/w2w.png" src="../_images/w2w.png" />
</div>
<div class="section" id="files">
<h2>Files<a class="headerlink" href="#files" title="Permalink to this headline">¶</a></h2>
<ul class="simple">
<li><strong>nlp_architect/data/fasttext_emb.py</strong>: Defines Fasttext object for loading Fasttext embeddings</li>
<li><strong>nlp_architect/models/crossling_emb.py</strong>: Defines GAN for learning crosslingual embeddings</li>
<li><strong>examples/crosslingembs/train.py</strong>: Trains the model and writes final crosslingual embeddings to weight_dir directory.</li>
<li><strong>examples/crosslingembs/evaluate.py</strong>: Defines graph for evaluating the quality of crosslingual embeddings</li>
</ul>
</div>
<div class="section" id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Permalink to this headline">¶</a></h2>
<p>Main arguments which need to be passed to train.py are</p>
<ul class="simple">
<li><strong>emb_dir</strong>: Directory where Fasttext embeddings are present or need to be downloaded</li>
<li><strong>eval_dir</strong>: Directory where evaluation dictionary is downloaded</li>
<li><strong>weight_dir</strong>: Directory where final crosslingual dictionaries are defined</li>
</ul>
<p>Use the following command to run training and generate crosslingual embeddings file:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="n">train</span><span class="o">.</span><span class="n">py</span> <span class="o">--</span><span class="n">data_dir</span> <span class="o">&lt;</span><span class="n">embedding</span> <span class="nb">dir</span><span class="o">&gt;</span> <span class="o">--</span><span class="n">eval_dir</span> <span class="o">&lt;</span><span class="n">evaluation</span> <span class="n">data</span><span class="o">&gt;</span> \
  <span class="o">--</span><span class="n">weight_dir</span> <span class="o">&lt;</span><span class="n">save_data</span><span class="o">&gt;</span> <span class="o">--</span><span class="n">epochs</span> <span class="mi">1</span>
</pre></div>
</div>
<div class="section" id="example-usage">
<h3>Example Usage<a class="headerlink" href="#example-usage" title="Permalink to this headline">¶</a></h3>
<p>Make directories for storing downloaded embeddings and multi language evaluation dictionaries</p>
<div class="code bash highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mkdir</span> <span class="n">data</span>
<span class="n">mkdir</span> <span class="o">./</span><span class="n">data</span><span class="o">/</span><span class="n">crosslingual</span><span class="o">/</span><span class="n">dictionaries</span>
</pre></div>
</div>
<p>Run training sequence pointing to embedding directory and multi language evaluation dictionaries. After training it will store the mapping weight and new cross lingual embeddings in weight_dir</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="n">train</span><span class="o">.</span><span class="n">py</span> <span class="o">--</span><span class="n">data_dir</span> <span class="o">./</span><span class="n">data</span> <span class="o">--</span><span class="n">eval_dir</span> <span class="o">./</span><span class="n">data</span><span class="o">/</span><span class="n">crosslingual</span><span class="o">/</span><span class="n">dictionaries</span> <span class="o">--</span><span class="n">weight_dir</span> <span class="o">./</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="results">
<h2>Results<a class="headerlink" href="#results" title="Permalink to this headline">¶</a></h2>
<p>When trained on English and French embeddings the results for word to word translation accuracy are as follows</p>
<table border="1" class="colwidths-given docutils">
<colgroup>
<col width="38%" />
<col width="31%" />
<col width="31%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head">Eval Method</th>
<th class="head">K=1</th>
<th class="head">K=10</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even"><td>NN</td>
<td>53.0</td>
<td>74.13</td>
</tr>
<tr class="row-odd"><td>CSLS</td>
<td>81.0</td>
<td>93.0</td>
</tr>
</tbody>
</table>
</div>
<div class="section" id="references">
<h2>References<a class="headerlink" href="#references" title="Permalink to this headline">¶</a></h2>
<table class="docutils footnote" frame="void" id="id2" rules="none">
<colgroup><col class="label" /><col /></colgroup>
<tbody valign="top">
<tr><td class="label"><a class="fn-backref" href="#id1">[1]</a></td><td>Alexis Conneau, Guillaume Lample, Marc’Aurelio Ranzato, Ludovic Denoyer, Herve Jegou Word Translation Without Parallel Data <a class="reference external" href="https://arxiv.org/pdf/1710.04087.pdf">https://arxiv.org/pdf/1710.04087.pdf</a></td></tr>
</tbody>
</table>
<table class="docutils footnote" frame="void" id="id3" rules="none">
<colgroup><col class="label" /><col /></colgroup>
<tbody valign="top">
<tr><td class="label">[2]</td><td>P.Bojanowski, E. Grave, A. Joulin, T. Mikolov, Enriching Word Vectors with Subword Information <a class="reference external" href="https://arxiv.org/abs/1607.04606">https://arxiv.org/abs/1607.04606</a></td></tr>
</tbody>
</table>
</div>
</div>


           </div>
           
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>